#!/usr/bin/python
# -*- coding: UTF-8 -*-

import ItemBaseCF as itemCF
import UserBaseCF as userCF

'''加载数据'''
data_path = "/Volumes/Office/python-hadoop-demo/collaborative-filtering/data/u.data"
lines = []
with open(data_path) as file_data:
    for line in file_data:
        lines.append(line)

print "Succeed load the", len(lines), "rows data"

'''
转换数据结构
转换前[Table] userId,itmeId,rating,timestamp
转换后[Json]
{
    userId:{
        itmeId:rating,
        itmeId:rating,
        ...
    }
}
'''
train = dict()
for line in lines:
    row = line.split('\t')
    userId = row[0]
    itmeId = row[1]
    rating = row[2]
    timestamp = row[3]
    if train.get(userId, -1) == -1:
        train[userId] = dict()
    train[userId][itmeId] = rating

'''用户相似度推荐'''
print "==============================UserBaseCF=============================="
user_similarity = userCF.user_similarity(train)
user_rank = userCF.user_recommend('1', train, user_similarity, 100)
print sorted(user_rank.items(), key=lambda x: x[1], reverse=True)[:10]

'''物品相似度推荐'''
print "==============================ItemBaseCF=============================="
item_similarity = itemCF.item_similarity(train)
item_rank = itemCF.item_recommend('1', train, item_similarity, 100)
print sorted(item_rank.items(), key=lambda x: x[1], reverse=True)[:10]
